In [1]:
# load required libraries
options(stringsAsFactors = F)
options (repr.plot.width = 12, repr.plot.height = 5)
suppressPackageStartupMessages({
library(Seurat)
library(harmony)
library(ggplot2)
library(dplyr)
library(Matrix)
library(Hmisc)
library(viridis)
library(RColorBrewer)
library(ggrepel)
library(cowplot)
})
set.seed(123)
In [2]:
# load samples
sample_ctrl <- readRDS("./matrix/subsample_DCM_annotated.rds")
sample_ctrl

sample_exp <- readRDS("./matrix/subsample_CAD_annotated.rds")
sample_exp
An object of class Seurat 
33538 features across 23538 samples within 1 assay 
Active assay: RNA (33538 features, 2000 variable features)
 4 dimensional reductions calculated: pca, harmony, tsne, umap
An object of class Seurat 
33538 features across 18304 samples within 1 assay 
Active assay: RNA (33538 features, 2000 variable features)
 4 dimensional reductions calculated: pca, harmony, tsne, umap
In [3]:
# merge sample and normalize
scList <- list(sample_ctrl, sample_exp)
sample <- merge(scList[[1]], scList[[2]])
# sample <- subset(sample, nFeature_RNA >= 500 & nFeature_RNA <= 5000 & percent.mt <= 10)
sample <- NormalizeData(sample, normalization.method = "LogNormalize", scale.factor = 10000)

# find variable Genes and scale data
sample <- FindVariableFeatures(sample, selection.method = "vst")
sample <- ScaleData(sample)
sample
Warning message in CheckDuplicateCellNames(object.list = objects):
“Some cell names are duplicated across objects provided. Renaming to enforce unique cell names.”
Centering and scaling data matrix

An object of class Seurat 
33538 features across 41842 samples within 1 assay 
Active assay: RNA (33538 features, 2000 variable features)
In [4]:
# run pca and harmony
sample <- RunPCA(sample, verbose = FALSE)
sample <- RunHarmony(sample, group.by.vars = "source", verbose = FALSE)
DimPlot(sample, reduction = "harmony", pt.size = 0.1, group.by = "source")
ElbowPlot(sample, ndims = 30)
Warning message:
“Invalid name supplied, making object name syntactically valid. New object name is Seurat..ProjectDim.RNA.harmony; see ?make.names for more details on syntax validity”
No description has been provided for this image
No description has been provided for this image
In [5]:
# dimension reduction and clustering
pca_dims <- 1:30
sample <- RunTSNE(sample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
sample <- RunUMAP(sample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
sample <- FindNeighbors(sample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
sample <- FindClusters(sample, resolution = 0.5, verbose = FALSE)
DimPlot(sample, label=TRUE, reduction = "tsne", group.by = "ident", pt.size = 0.1)
DimPlot(sample, label=TRUE, reduction = "umap", group.by = "ident", pt.size = 0.1)
Warning message:
“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
No description has been provided for this image
No description has been provided for this image
In [6]:
# add resolution
sample <- FindClusters(sample, resolution = 1, verbose = FALSE)
DimPlot(sample, label=TRUE, reduction = "tsne", group.by = "ident", pt.size = 0.1)
DimPlot(sample, label=TRUE, reduction = "umap", group.by = "ident", pt.size = 0.1)
No description has been provided for this image
No description has been provided for this image
In [7]:
# quality control by cluster
VlnPlot(sample, features = c("nFeature_RNA","nCount_RNA","percent.mt"), group.by = "seurat_clusters", ncol = 1, pt.size = 0.1)
ggsave("figure/qc_sample_by_cluster.pdf", width = 10, height = 10)
No description has been provided for this image
In [8]:
# remove cluster 18 and 21
sample <- subset(sample, idents = c(18, 21), invert = T)
sample <- NormalizeData(sample, normalization.method = "LogNormalize", scale.factor = 10000)

# find variable Genes and scale data
sample <- FindVariableFeatures(sample, selection.method = "vst")
sample <- ScaleData(sample)
sample

# run pca and harmony
sample <- RunPCA(sample, verbose = FALSE)
sample <- RunHarmony(sample, group.by.vars = "source", verbose = FALSE)
DimPlot(sample, reduction = "harmony", pt.size = 0.1, group.by = "source")
ElbowPlot(sample, ndims = 30)

# dimension reduction and clustering
pca_dims <- 1:30
sample <- RunTSNE(sample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
sample <- RunUMAP(sample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
sample <- FindNeighbors(sample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
sample <- FindClusters(sample, resolution = 0.5, verbose = FALSE)
DimPlot(sample, label=TRUE, reduction = "tsne", group.by = "ident", pt.size = 0.1)
DimPlot(sample, label=TRUE, reduction = "umap", group.by = "ident", pt.size = 0.1)
Centering and scaling data matrix

An object of class Seurat 
33538 features across 41296 samples within 1 assay 
Active assay: RNA (33538 features, 2000 variable features)
 4 dimensional reductions calculated: pca, harmony, tsne, umap
Warning message:
“Invalid name supplied, making object name syntactically valid. New object name is Seurat..ProjectDim.RNA.harmony; see ?make.names for more details on syntax validity”
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [9]:
# quality control by cluster
VlnPlot(sample, features = c("nFeature_RNA","nCount_RNA","percent.mt"), group.by = "seurat_clusters", ncol = 1, pt.size = 0.1)
ggsave("figure/qc_sample_by_cluster_sub1.pdf", width = 10, height = 10)
No description has been provided for this image
In [11]:
# remove cluster 15 and 16
sample <- subset(sample, idents = c(15, 16), invert = T)
sample <- NormalizeData(sample, normalization.method = "LogNormalize", scale.factor = 10000)

# find variable Genes and scale data
sample <- FindVariableFeatures(sample, selection.method = "vst")
sample <- ScaleData(sample)
sample

# run pca and harmony
sample <- RunPCA(sample, verbose = FALSE)
sample <- RunHarmony(sample, group.by.vars = "source", verbose = FALSE)
DimPlot(sample, reduction = "harmony", pt.size = 0.1, group.by = "source")
ElbowPlot(sample, ndims = 30)

# dimension reduction and clustering
pca_dims <- 1:30
sample <- RunTSNE(sample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
sample <- RunUMAP(sample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
sample <- FindNeighbors(sample, dims = pca_dims, reduction = "harmony", verbose = FALSE)
sample <- FindClusters(sample, resolution = 0.5, verbose = FALSE)
DimPlot(sample, label=TRUE, reduction = "tsne", group.by = "ident", pt.size = 0.1)
DimPlot(sample, label=TRUE, reduction = "umap", group.by = "ident", pt.size = 0.1)
Centering and scaling data matrix

An object of class Seurat 
33538 features across 41078 samples within 1 assay 
Active assay: RNA (33538 features, 2000 variable features)
 4 dimensional reductions calculated: pca, harmony, tsne, umap
Warning message:
“Invalid name supplied, making object name syntactically valid. New object name is Seurat..ProjectDim.RNA.harmony; see ?make.names for more details on syntax validity”
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [12]:
# plot tsne and umap
a <- DimPlot(sample, label = TRUE, reduction = "umap", group.by = "celltype")
b <- DimPlot(sample, label = TRUE, reduction = "tsne", group.by = "celltype")
plot_grid(a, b, ncol = 2)
No description has been provided for this image
In [13]:
# plot split umap
sample$condition <- "undefined"
sample$condition[grep("CTRL", sample$orig.ident)] <- "CTRL"
sample$condition[grep("EXP", sample$orig.ident)] <- "EXP"
DimPlot(sample, label = TRUE, reduction = "umap", group.by = "celltype", split.by = "condition")
No description has been provided for this image
In [15]:
# save the integrated sample
saveRDS(sample, file = "./rds/sample_DCM_CAD_integrated.rds")
In [16]:
# list the session info
sessionInfo()
R version 4.3.1 (2023-06-16)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 18.04.5 LTS

Matrix products: default
BLAS/LAPACK: /data/zju/ty/miniconda/envs/singlecell/lib/libopenblasp-r0.3.3.so;  LAPACK version 3.8.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=zh_CN.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=zh_CN.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=zh_CN.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=zh_CN.UTF-8 LC_IDENTIFICATION=C       

time zone: Asia/Shanghai
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] cowplot_1.1.1      ggrepel_0.9.4      RColorBrewer_1.1-3 viridis_0.6.4     
 [5] viridisLite_0.4.2  Hmisc_5.1-1        Matrix_1.6-1.1     dplyr_1.1.4       
 [9] ggplot2_3.5.1      harmony_0.1.1      Rcpp_1.0.11        SeuratObject_4.1.4
[13] Seurat_4.4.0      

loaded via a namespace (and not attached):
  [1] rstudioapi_0.17.1      jsonlite_1.8.7         magrittr_2.0.3        
  [4] spatstat.utils_3.1-1   rmarkdown_2.25         farver_2.1.1          
  [7] vctrs_0.6.4            ROCR_1.0-11            spatstat.explore_3.2-3
 [10] base64enc_0.1-3        htmltools_0.5.6.1      Formula_1.2-5         
 [13] sctransform_0.4.0      parallelly_1.36.0      KernSmooth_2.23-22    
 [16] htmlwidgets_1.6.2      ica_1.0-3              plyr_1.8.9            
 [19] plotly_4.10.2          zoo_1.8-12             uuid_1.1-1            
 [22] igraph_1.5.1           mime_0.12              lifecycle_1.0.3       
 [25] pkgconfig_2.0.3        R6_2.5.1               fastmap_1.1.1         
 [28] fitdistrplus_1.1-11    future_1.33.0          shiny_1.7.5           
 [31] digest_0.6.33          colorspace_2.1-0       patchwork_1.3.0.9000  
 [34] tensor_1.5             irlba_2.3.5.1          progressr_0.14.0      
 [37] fansi_1.0.5            spatstat.sparse_3.0-2  httr_1.4.7            
 [40] polyclip_1.10-6        abind_1.4-5            compiler_4.3.1        
 [43] withr_2.5.1            htmlTable_2.4.1        backports_1.4.1       
 [46] MASS_7.3-60            tools_4.3.1            foreign_0.8-85        
 [49] lmtest_0.9-40          httpuv_1.6.11          future.apply_1.11.0   
 [52] nnet_7.3-19            goftest_1.2-3          glue_1.6.2            
 [55] nlme_3.1-163           promises_1.2.1         grid_4.3.1            
 [58] pbdZMQ_0.3-10          checkmate_2.2.0        Rtsne_0.16            
 [61] cluster_2.1.4          reshape2_1.4.4         generics_0.1.3        
 [64] gtable_0.3.4           spatstat.data_3.0-1    tidyr_1.3.1           
 [67] data.table_1.14.8      sp_2.1-0               utf8_1.2.3            
 [70] spatstat.geom_3.2-5    RcppAnnoy_0.0.21       RANN_2.6.1            
 [73] pillar_1.9.0           stringr_1.5.0          IRdisplay_1.1         
 [76] later_1.3.1            splines_4.3.1          lattice_0.21-9        
 [79] survival_3.5-7         deldir_1.0-9           tidyselect_1.2.0      
 [82] miniUI_0.1.1.1         pbapply_1.7-2          knitr_1.44            
 [85] gridExtra_2.3          scattermore_1.2        xfun_0.40             
 [88] matrixStats_1.0.0      stringi_1.7.12         lazyeval_0.2.2        
 [91] evaluate_0.22          codetools_0.2-19       tibble_3.2.1          
 [94] cli_3.6.3              uwot_0.1.16            IRkernel_1.3.2        
 [97] rpart_4.1.21           xtable_1.8-4           reticulate_1.34.0     
[100] repr_1.1.6             munsell_0.5.0          globals_0.16.2        
[103] spatstat.random_3.1-6  png_0.1-8              parallel_4.3.1        
[106] ellipsis_0.3.2         listenv_0.9.0          scales_1.3.0          
[109] ggridges_0.5.4         leiden_0.4.3           purrr_1.0.2           
[112] crayon_1.5.2           rlang_1.1.4           
In [ ]: